from pyspark.sql import SparkSession
import datetime

if __name__ == '__main__':
    spark = SparkSession.builder. \
        appName("test"). \
        master("local[*]"). \
        getOrCreate()

    sc = spark.sparkContext
    # sc.setCheckpointDir(f"../target/checkpoint/{datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}")

    # 基于RDD转换成DataFrame
    rdd = sc.textFile("../data/input/sql/people.txt").map(lambda x: x.split(",")).map(lambda x: (x[0], int(x[1])))

    df = spark.createDataFrame(rdd, schema=['name', 'age'])
    # df.show()
    # df = df.checkpoint()
    df.createOrReplaceTempView("people")
    spark.sql("select name, age from people where age > 19").explain(True)
